import config
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

class model_qianwen_local(metaclass=config.SingletonMeta):
    def __init__(self) -> None:
        super(model_qianwen_local, self).__init__()
        if torch.cuda.is_available():
            print(f"CUDA device name: {torch.cuda.get_device_name(0)}")
        for llm in config.get_config().models.llms:
            if llm.name == "qwen_14b_chat_int4" and llm.enable == 1:
                self._qianwen_path = llm.path
        self._model = AutoModelForCausalLM.from_pretrained(
                        pretrained_model_name_or_path=self._qianwen_path, device_map='auto', trust_remote_code=True
                    ).eval()
        self._tokenizer = AutoTokenizer.from_pretrained(
                        pretrained_model_name_or_path=self._qianwen_path, trust_remote_code=True
                    )
    
    def completion(self, prompt_system, prompt_user):
        response, history = self._model.chat(
            self._tokenizer, system=prompt_system, query=prompt_user, history=None
        )
        return response